#!/usr/bin/env python

import nltk
from nltk import FreqDist
from nltk.corpus import brown 

# ------------- part A
# for cate in brown.categories():
# 	print cate, brown.words(categories=cate)[:3]


# ------------- part B
# wwwwww =['what','how','when','who','why']
# news_text = brown.words(categories='news')
# fdist = FreqDist([w.lower() for w in news_text])
# for w in wwwwww:
# 	print w+':',fdist[w]


# ------------- part C

cfd = nltk.ConditionalFreqDist(
	(genre, word)
	for genre in brown.categories()
	for word in brown.words(categories=genre)
	)
genres=['news','religion','hobbies','science_fiction','romance', 'humor']
modals=['can','could','may','might','must', 'will']
cfd.tabulate(conditions=genres,samples=modals)
